* ---------------- merge all data set
* NOTE: state info from ANES should be corrected;

use "${dir_processed}/tess_2016", clear 
append using "${dir_processed}/tess_2010",
append using "${dir_processed}/gss_data1",
append using "${dir_processed}/gss_data2", 
append using "${dir_processed}/cnes_1992",
append using "${dir_processed}/anes_2000", 
append using "${dir_processed}/anes_2006",
append using "${dir_processed}/anes_2008",


* adjust survey weights 
gen wt = wtall if dataset == "GSS" 
replace wt = wtpan123 if dataset== "GSS" & year == 2010
replace wt = wt if dataset == "ANES" & year == 1980 
replace wt = wt if dataset == "ANES" & year == 1998 
replace wt = wt_post if dataset == "ANES" & year == 2000 

replace wt = wtall if dataset == "ANES" & year == 2006 
replace wt = wtall if dataset == "ANES" & year == 2008
replace wt = wtall if dataset == "TESS" & year == 2010
replace wt = weight if dataset == "TESS" & year == 2016

replace wt = 1 if year == 1992 

* survey set-up
svyset [pw=wt]

* assign analytic years for easier display
gen ayear = year 
replace ayear = 2011 if dataset == "TESS" & year == 2010

label define ayear 1985 "GSS1985" 1987 "GSS1987" 1992 "CNES1992" /*
*/ 2000 "ANES2000" 2004 "GSS2004" 2006 "ANES2006" 2008 "ANES2008" /*
*/ 2010 "GSS2010"  2011 "TESS2010" 2016 "TESS2016"
label value ayear ayear 


* adjust TESS 2016 module
replace k_imp = (module == 1) if ~missing(module)
gen k_pol = 1- k_imp

* pid conversion ----------------
gen pid_int = . 
	replace pid_int = abs(r_partyid7 - 4) 
recode r_partyid7 (1/2=1) (3/5=2) (6/7=3), gen(pid3)

gen pid_ind = (pid_int < 2) if ~missing(pid_int)
gen ideo_int = . 
	replace ideo_int = abs(r_ideo - 4)

label var ideo_int "Ideological Strength"
label define pid3 1 "Democrat" 2 "Independent" 3 "Republican"
label value pid3 pid3 

* ----------------------------------------------------
* variable label 
* ----------------------------------------------------
label var r_age "Age"
label var r_educ "Education"
label var r_female "Female"
label var r_race "Race"
label define r_race 1 "White" 2 "Black" 3 "Other" 4 "missing"
label value r_race r_race
	label var r_white "White"
	label var r_black "Black"
	label var r_others "Other race"

label var r_marital "Martial Stuas"
	label var r_married "Currently Married"
	label var r_single "Never Married"

label var r_child "Number of Children"
label var r_adults "Number of Adults in Household"
label var r_wrkstat "Working Status"
	label var r_working "Job status: Working"
	label var r_unemployed "Job status: Unemployed"
	label var r_retired "Job status: Retired"
	label var r_otherworks "Job status: Others"

label var r_attendance "Religious Service Attendance"
label var n_size "Network Size"

label var r_partyid7 "Party ID"
label var r_ideo "Ideology"
label var r_pol_discuss "Political Discussion"
label var r_pol_interest "Political Interest"
label var r_talk_politics "Political Talk: yes"
label var r_talk_freq "Political Talk: frequency"

label var i_age "Interviewer : Age"
label var i_female "Interviewer : Female"
label var i_race "Interviewer : Race"
label define i_race 1 "Interviewer:White" 2 "Interviewer:Black" 3 "Interviewer:Other"
label value i_race i_race 

label var i_tenure "Interviewer: Experience (Years)"
label var i_phonemode "Telephone Mode"
label var i_uncoop "Uncooperativeness"
label var i_poorcomprend "Poor Comprehension"
label var i_numskip_c "Prior SAQ Skips"
label define i_numskip_c 0 "SAQ Skips (None)" 1 "SAQ: 1 Prior Skip" 2 "SAQ: 2 to 3 Prior Skips" 3 "SAQ: 4+ Prior Skips"
	label value i_numskip_c i_numskip_c 
label define ballot 1 "Ballot One (early position)" 2 "Ballot Two (after voluntary membership)" 3 "Ballot Three (end position)"
	label value ballot ballot 

* ------------------------------------------------------------
* network size 
* ------------------------------------------------------------

* corrected n_size variable : 12 are changed
	*gen correct= n_names != n_size if year == 2016 
	*tab correct if ~missing(weight)
replace n_size = n_names  if year == 2016 & dataset== "TESS"
replace n_size = n_names  if year == 2010 & dataset== "TESS"

gen n_zero = n_size == 0 if ~missing(n_size)
label var n_zero "Isolation"

* consider talk politics sub-network
* GSS 1987 (upto 3) and CNES 1992 (upto 5)
egen n_subpol_1987 = anycount(a1_talkpol a2_talkpol a3_talkpol),values(2 3 4 5), if year == 1987
replace n_subpol_1987 = . if missing(a1_talkpol)
replace n_subpol_1987 = . if year != 1987

egen n_subpol_1992 = anycount(a1_talkpol a2_talkpol a3_talkpol a4_talkpol a5_talkpol),values(2 3), if year == 1992
replace n_subpol_1992 = . if missing(n_size_pol)
replace n_subpol_1992 = . if year != 1992

* exclude some non-relevant year data
tab year dataset 
drop if year == 1998 | year == 1986 | year == 2014 

egen year_dataset = group(year dataset)
label var year_dataset "Year-Dataset"

* network size indicators 
gen n_important = .
replace n_important = n_size if year == 1985 
replace n_important = n_size if year == 1987 
replace n_important = n_size_imp if year == 1992 
replace n_important = n_size if year == 2004
replace n_important = n_size if year == 2006 & k_imp == 1 
replace n_important = n_size if year == 2010
replace n_important = n_size if year == 2016 & k_imp == 1

gen n_politics = .
replace n_politics = n_subpol_1987 if year == 1987 
replace n_politics = n_subpol_1992 if year == 1992 
replace n_politics = n_size if year == 2000 
replace n_politics = n_size if year == 2006 & k_imp == 0
replace n_politics = n_size if year == 2008
replace n_politics = n_size if year == 2016 & k_imp == 0

* duplicate some years : 1992 CNES, 1987 GSS to consider political subnetworks as separate data sets
expand 2 if year == 1992 | year == 1987, gen(expand_new)

replace n_size = n_size_imp if year == 1992 & expand_new == 0 
replace n_size = n_subpol_1992 if year == 1992 & expand_new == 1 
replace n_size = n_subpol_1987 if year == 1987 & expand_new == 1 

gen imp_name = k_imp 
	replace imp_name = 1 if missing(k_imp)
	replace imp_name = 0 if expand_new == 1
	replace imp_name = 0 if dataset == "ANES"
label var imp_name "name generator : imp"

* adjust network size 
gen n_politics4 = n_politics
	replace n_politics4 = 4 if n_politics > 4 & ~missing(n_politics)

gen n_politics3 = n_politics
	replace n_politics3 = 3 if n_politics > 3 & ~missing(n_politics)

gen n_important3 = n_important
	replace n_important3 = 3 if n_important > 3 & ~missing(n_important)

gen n_important4 = n_important
	replace n_important4 = 4 if n_important > 4 & ~missing(n_important)

gen n_important6 = n_important
	replace n_important6 = 6 if n_important > 6 & ~missing(n_important)

gen n0_politics = n_politics == 0 if ~missing(n_politics)
gen n0_important = n_important == 0 if ~missing(n_important)

* for comparison across two data 
cap: drop n_size3
gen n_size3 = n_size  
	replace n_size3 = 3 if n_size > 3 & ~missing(n_size)

cap: drop n_size6
gen n_size6 = n_size  
	replace n_size6 = 6 if n_size > 6 & ~missing(n_size)

* ------------------------------------------------------------
* political homophily 
* ------------------------------------------------------------
recode a1_partyid (1=1) (2=3) (3/4=2), gen(aa1_partyid3)
recode a2_partyid (1=1) (2=3) (3/4=2), gen(aa2_partyid3)
recode a3_partyid (1=1) (2=3) (3/4=2), gen(aa3_partyid3)

replace a1_partyid3 = aa1_partyid3 if year == 1987
replace a2_partyid3 = aa2_partyid3 if year == 1987
replace a3_partyid3 = aa3_partyid3 if year == 1987

replace a1_partyid3 = a0_partyid3 if year == 2016

*tab pid3 a1_partyid3 if year == 1987,row
*tab pid3 a1_partyid3 if year == 2008,row
*tab pid3 a1_partyid3 if year == 2016,row

gen same1_pid3 = a1_partyid3 == pid3 if ~missing(a1_partyid3) & ~missing(pid3)
gen same2_pid3 = a2_partyid3 == pid3 if ~missing(a2_partyid3) & ~missing(pid3)
gen same3_pid3 = a3_partyid3 == pid3 if ~missing(a3_partyid3) & ~missing(pid3)

egen p_same_pid3 = rowmean(same?_pid3)

gen byear = ayear 
replace byear = 2017 if ayear == 2016 & k_imp == 0 

label var p_same_pid3 "% Same Party ID"

* --------------- by voting : CNES 1992 / ANES 2000 : check later 
* a?_vote_same : CNES 1992 
tab r_bush a1_bush // exclude nonvoters by >0 : CNES 1992
tab r_bush a1_vote_bush // exclude nonvoters by >0 : ANES 2000
* a?_vote_bush_a
tab a1_vote_same_a year 

* -----------------------for dyadic level analysis 
* party homophily : 1987, 2008, and 2016
* voting homophily : 1992, 2000, 

* relationship type : across all 
* ------- CNES 1992 data
forvalues i = 1/5{
	replace a`i'_spouse = a`i'_type == 1 if year == 1992 & ~missing(a`i'_type)	
	gen a`i'_family = a`i'_type == 2 if year == 1992 & ~missing(a`i'_type)	
	gen a`i'_unrelated = a`i'_type == 3 if year == 1992 & ~missing(a`i'_type)	
	* a?_coworker 
	* a?_friend 
	* a?_same_church
	* a?_neighbor 
	* a?_closefriend 
}

* ------- ANES 2000 data
* a?_relative a?_neighbor a?_coworker  a?_same_church 

* ------- ANES 2008 data 
* a?_live_together a?_relative a?_same_church 

* ------- GSS 1985/1987/2004/2010
* a`i'_spouse  
* a`i'_parent  
* a`i'_sibling 
* a`i'_child   
* a`i'_othfam  
* a`i'_cowork  
* a`i'_neighbor
* a`i'_friend  
* a`i'_advisor 
* a`i'_other   

* relationship type : TESS 2010 / 2016
tab a_type 
tab a0_rel if ~missing(weight) & year == 2016 & k_imp == 1
tab a0_rel if ~missing(weight) & year == 2016 & k_imp == 0

replace a0_rel = a_type if year == 2010 
*                           235         1  Parent
*                           178         2  Sibling
*                           444         3  Spouse/Romantic partner
*                           149         4  Child
*                           139         5  Other family member
*                            80         6  Coworker
*                           619         7  Friend
*                            25         8  Neighbor
*                            99         9  Other

replace a1_parent = (a0_rel == 1) if ~missing(a0_rel)
replace a1_sibling = (a0_rel == 2) if ~missing(a0_rel)
replace a1_spouse = (a0_rel == 3) if ~missing(a0_rel)
replace a1_child = (a0_rel == 4) if ~missing(a0_rel)
replace a1_othfam = (a0_rel == 5) if ~missing(a0_rel)
replace a1_cowork = (a0_rel == 6) if ~missing(a0_rel)
replace a1_friend = (a0_rel == 7) if ~missing(a0_rel)
replace a1_neighbor = (a0_rel == 8) if ~missing(a0_rel)
replace a1_other = (a0_rel == 9) if ~missing(a0_rel)

* --------------- across all; "relative" versus "non-relative" variables (ANES 2000;ANES 2008)
replace a1_relative = a0_rel >= 1 & a0_rel <=5 if dataset == "TESS"

forvalues i = 1/5{
	egen aa`i'_relative = rowtotal(a`i'_spouse a`i'_child a`i'_parent a`i'_sibling a`i'_othfam)
	egen aa`i'_m = rownonmiss(a`i'_spouse a`i'_child a`i'_parent a`i'_sibling a`i'_othfam)
	replace aa`i'_relative = . if aa`i'_m == 0 & dataset == "GSS"
	
	replace a`i'_relative = aa`i'_relative > 0 if ~missing(aa`i'_relative) & dataset == "GSS"
}

compress
save "${dir_processed}/tess_cleaned.dta", replace 


